Last Updated: Sep-06-2021
Let’s do stuff with the Atlanta Crime Data. We’ll follow recommendations from codeforatlanta/apd-crime-data github.
Namley:
getdata_APD_crime.RapdCrimeDataClean: non-error rows, with extra columns X, X.1, X.2 removedapdCrimeDataErrors: error rows onlyThe following packages must be loaded:
library(readxl)
library(tidyr)
library(dplyr)
library(leaflet)
library(Hmisc)
library(lubridate)
library(maptools)
library(foreign)
library(here)
First we’ll download the zip file. Commented lines were run once.
# temp <- tempfile(tmpdir = here::here())
# download.file(url = "http://www.atlantapd.org/files/crimedata/COBRA110416.zip"
# ,destfile = here::here("COBRA110416.zip"))
apdCrimeData <- read_excel(here::here("COBRA110416.xlsx")
, sheet="Query")
# unlink(temp)
Let’s check and make sure each column is what we expect it to be:
apdCrimeData %>% str()
## tibble [261,976 × 23] (S3: tbl_df/tbl/data.frame)
## $ MI_PRINX : chr [1:261976] "1160569" "1160570" "1160572" "1160573" ...
## $ offense_id : chr [1:261976] "090360664" "090370891" "091681984" "072692336" ...
## $ rpt_date : chr [1:261976] "02/05/2009" "02/06/2009" "06/17/2009" "02/24/2010" ...
## $ occur_date : chr [1:261976] "02/03/2009" "02/06/2009" "06/17/2009" "02/24/2010" ...
## $ occur_time : chr [1:261976] "13:50:00" "08:50:00" "14:00:00" "23:29:00" ...
## $ poss_date : chr [1:261976] "02/03/2009" "02/06/2009" "06/17/2009" "02/24/2010" ...
## $ poss_time : chr [1:261976] "15:00:00" "10:45:00" "15:00:00" "23:30:00" ...
## $ beat : chr [1:261976] "305" "502" "604" "303" ...
## $ apt_office_prefix: chr [1:261976] NA NA NA NA ...
## $ apt_office_num : chr [1:261976] NA NA "816" NA ...
## $ location : chr [1:261976] "55 MCDONOUGH BLVD SW" "464 ANSLEY WALK TER NW" "375 AUBURN AVE" "600 MARTIN ST" ...
## $ MinOfucr : chr [1:261976] "0670" "0640" "0670" "0420" ...
## $ MinOfibr_code : chr [1:261976] "2308" "2305" "2308" "1315K" ...
## $ dispo_code : chr [1:261976] NA NA NA NA ...
## $ MaxOfnum_victims : chr [1:261976] "1" "1" "1" "1" ...
## $ Shift : chr [1:261976] "Day" "Day" "Day" "Morn" ...
## $ Avg Day : chr [1:261976] "Tue" "Fri" "Wed" "Wed" ...
## $ loc_type : chr [1:261976] "35" "18" NA "26" ...
## $ UC2 Literal : chr [1:261976] "LARCENY-NON VEHICLE" "LARCENY-FROM VEHICLE" "LARCENY-NON VEHICLE" "AGG ASSAULT" ...
## $ neighborhood : chr [1:261976] "South Atlanta" "Ansley Park" "Sweet Auburn" "Pittsburgh" ...
## $ npu : chr [1:261976] "Y" "E" "M" "V" ...
## $ x : chr [1:261976] "-84.386539999999997" "-84.37276" "-84.375209999999996" "-84.394599999999997" ...
## $ y : chr [1:261976] "33.720239999999997" "33.796849999999999" "33.755400000000002" "33.722119999999997" ...
names(apdCrimeData) <- gsub(" ", "_", names(apdCrimeData))
apdCrimeDataTidy <- apdCrimeData %>%
mutate(MI_PRINX = as.numeric(MI_PRINX),
offense_id = as.numeric(offense_id),
rpt_date = lubridate::as_date(rpt_date, format = "%m/%d/%Y"),
occur_date = lubridate::as_date(occur_date, format = "%m/%d/%Y"),
poss_date = lubridate::as_date(poss_date, format = "%m/%d/%Y"),
x = as.numeric(x),
y = as.numeric(y))
Let’s use pieces from codeforatlanta’s function to tidy this data:
errors_horiz_offset <- c(91350923, 91420511, 91471067, 91521689, 101540909,
101701138, 111971638, 112090917, 112411694, 113130827,
113221244, 113270554, 113531411, 113590628, 120230979,
122561142, 130101490, 141621526, 142570818, 151362710)
errors_strange_date <- c(141260924)
errors_all <- c(errors_horiz_offset, errors_strange_date)
apdCrimeDataClean <- apdCrimeDataTidy %>%
filter(!(offense_id %in% errors_all))
apdCrimeDataErrors <- apdCrimeDataTidy %>%
filter(offense_id %in% errors_all)
Let’s download the shapefiles directly from the atlanta crime data site:
# temp <- tempfile(tmpdir = here::here())
# download.file(url = "http://www.atlantapd.org/pdf/crime-data-downloads/1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C.zip"
# ,destfile = here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C.zip"))
# unlink(temp)
beats <- readShapeSpatial(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Beats-070116_region.shp"))
## Warning: readShapeSpatial is deprecated; use rgdal::readOGR or sf::st_read
## Warning: readShapePoly is deprecated; use rgdal::readOGR or sf::st_read
zones <- readShapeSpatial(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Zones-070116_region.shp"))
## Warning: readShapeSpatial is deprecated; use rgdal::readOGR or sf::st_read
## Warning: readShapePoly is deprecated; use rgdal::readOGR or sf::st_read
Now let’s add info from the .dbf files that came with the shapefiles.
beats_dbf <- read.dbf(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Beats-070116_region.dbf"))
zones_dbf <- read.dbf(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Zones-070116_region.dbf"))
Let’s filter the data in different ways. leaflet seems unable to handle too many points on a map.
Now let’s plot 100 crimes on a map in leaflet.
# describe(apdCrimeDataClean %>% select(x, y))
apdCrimeDataClean %>%
slice(1:100) %>%
leaflet() %>%
addTiles() %>%
addMarkers(lng = ~x, lat = ~y, popup = ~UC2_Literal)
How many crimes happened in 2016 alone?
apdCrimeDataClean %>%
filter(year(occur_date) == 2016) %>%
nrow()
## [1] 24220
That might be too much for a single leaflet plot. What are the frequencies of all the different types of crime?
apdCrimeDataClean %>%
group_by(UC2_Literal) %>%
summarise(freq = n()) %>%
ungroup()
## # A tibble: 11 × 2
## UC2_Literal freq
## * <chr> <int>
## 1 AGG ASSAULT 18507
## 2 AUTO THEFT 37095
## 3 BURGLARY-NONRES 8186
## 4 BURGLARY-RESIDENCE 41946
## 5 HOMICIDE 692
## 6 LARCENY-FROM VEHICLE 74316
## 7 LARCENY-NON VEHICLE 62641
## 8 RAPE 935
## 9 ROBBERY-COMMERCIAL 1784
## 10 ROBBERY-PEDESTRIAN 14018
## 11 ROBBERY-RESIDENCE 1835
Let’s make a map of only homicides.
apdCrimeDataClean %>%
filter(UC2_Literal=="HOMICIDE") %>%
leaflet() %>%
addTiles() %>%
addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")
Now we’ll try to add shapefiles to our map. Let’s first try adding the beats.
apdCrimeDataClean %>%
filter(UC2_Literal=="HOMICIDE") %>%
leaflet() %>%
addTiles() %>%
addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
color="gray") %>%
addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
color="black") %>%
addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")
Now add zones.
apdCrimeDataClean %>%
filter(UC2_Literal=="HOMICIDE") %>%
leaflet() %>%
addTiles() %>%
addPolygons(data=zones, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
color="gray") %>%
addPolylines(data=zones, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
color="black") %>%
addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")
What if I add both zones and beats?
apdCrimeDataClean %>%
filter(UC2_Literal=="HOMICIDE") %>%
leaflet() %>%
addTiles() %>%
addPolygons(data=zones, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
color="gray") %>%
addPolylines(data=zones, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
color="blue") %>%
addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
color="red") %>%
addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")
Can we color the beats or zones by total number of crime type?
homicidesByBeat <- apdCrimeDataClean %>%
filter(UC2_Literal=="HOMICIDE") %>%
group_by(beat) %>%
summarise(freq = n()) %>%
ungroup()
# heatCols <- heat.colors(nrow(homicidesByBeat))[cut(sort(homicidesByBeat$freq),nrow(homicidesByBeat))]
pal <- colorNumeric(
# palette = "YlGnBu",
# palette = "RdYlBu",
palette = "YlOrRd",
domain = homicidesByBeat$freq
)
apdCrimeDataClean %>%
# filter(UC2_Literal=="HOMICIDE") %>%
leaflet() %>%
addTiles() %>%
addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
color="black") %>%
# addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
# fillColor=heatCols) %>%
addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor = 0.5,
color = ~pal(homicidesByBeat$freq)) %>%
addLegend("bottomright", pal = pal, values = homicidesByBeat$freq,
title = "Legend",
opacity = 1)
rapeByBeat <- apdCrimeDataClean %>%
filter(UC2_Literal=="RAPE") %>%
group_by(beat) %>%
summarise(freq = n()) %>%
ungroup()
# heatCols <- heat.colors(nrow(rapeByBeat))[cut(sort(rapeByBeat$freq),nrow(rapeByBeat))]
pal <- colorNumeric(
palette = "YlGnBu",
domain = rapeByBeat$freq
)
apdCrimeDataClean %>%
# filter(UC2_Literal=="RAPE") %>%
leaflet() %>%
addTiles() %>%
addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
color="black") %>%
# addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
# fillColor=heatCols) %>%
addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor = 0.5,
color = ~pal(rapeByBeat$freq)) %>%
addLegend("bottomright", pal = pal, values = rapeByBeat$freq,
title = "Legend",
opacity = 1)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] here_1.0.1 foreign_0.8-80 maptools_1.0-2 sp_1.4-5
## [5] lubridate_1.7.9.2 Hmisc_4.4-2 ggplot2_3.3.5 Formula_1.2-4
## [9] survival_3.2-7 lattice_0.20-41 leaflet_2.0.4.1 dplyr_1.0.3
## [13] tidyr_1.1.2 readxl_1.3.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.5 png_0.1-7 rprojroot_2.0.2
## [4] assertthat_0.2.1 digest_0.6.27 utf8_1.2.2
## [7] R6_2.5.1 cellranger_1.1.0 backports_1.2.1
## [10] evaluate_0.14 pillar_1.6.2 rlang_0.4.11
## [13] rstudioapi_0.13 data.table_1.13.6 rpart_4.1-15
## [16] Matrix_1.2-18 checkmate_2.0.0 rmarkdown_2.6
## [19] splines_4.0.3 stringr_1.4.0 htmlwidgets_1.5.3
## [22] munsell_0.5.0 compiler_4.0.3 xfun_0.20
## [25] pkgconfig_2.0.3 base64enc_0.1-3 rgeos_0.5-5
## [28] htmltools_0.5.2 nnet_7.3-14 tidyselect_1.1.0
## [31] tibble_3.1.4 gridExtra_2.3 htmlTable_2.1.0
## [34] fansi_0.5.0 crayon_1.4.1 withr_2.4.2
## [37] grid_4.0.3 jsonlite_1.7.2 gtable_0.3.0
## [40] lifecycle_1.0.0 DBI_1.1.1 magrittr_2.0.1
## [43] scales_1.1.1 cli_3.0.1 stringi_1.5.3
## [46] farver_2.1.0 latticeExtra_0.6-29 ellipsis_0.3.2
## [49] generics_0.1.0 vctrs_0.3.8 RColorBrewer_1.1-2
## [52] tools_4.0.3 glue_1.4.2 purrr_0.3.4
## [55] crosstalk_1.1.0.1 jpeg_0.1-8.1 fastmap_1.1.0
## [58] yaml_2.2.1 colorspace_2.0-2 cluster_2.1.0
## [61] knitr_1.30